clear all
set more off
set mem 10000000
set maxvar 120000
set matsize 10000
version 15

********************************************************************************
*** Script to compile inputs for expenditure-based cost-benefit calculations ***
********************************************************************************

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

****************************************************************** 
****************************************************************** 

use "$panel/panel_dataset_full.dta", clear
drop hpca11* vd*11
global disc = "0.05 0.10 0.15" // list of discount rates
global pop = "300 1000 2000" // considering villages of 3 population sizes
set seed 19831111

	// Keep villages with 2011 populations with 50 people of 300, 1000, 2000
gen pop_bucket = .
foreach pop in $pop {
	replace pop_bucket = `pop' if inrange(tot_p11,`pop'-50,`pop'+50)
}
drop if pop_bucket==.	
	
	// Calculate implied annual population growth rate
gen p_growth = (tot_p11/tot_p)^(1/10)-1
gen p_growth_median = .
foreach pop in $pop {
	sum p_growth if pop_bucket==`pop', detail	
	replace p_growth_median = r(p50) if pop_bucket==`pop'
}

	// Calculate number of households 
gen hh_size	= tot_p11/no_hh11
gen median_hh_size = .
foreach pop in $pop {
	sum hh_size if pop_bucket==`pop', detail	
	replace median_hh_size = round(r(p50)) if pop_bucket==`pop'
}
	// all are very close to 5; we'll assume a uniform household size of 5 people
assert median_hh_size==5	
	
	// Keep only populations, households, and buckets
keep pop_bucket p_growth_median	median_hh_size
duplicates drop
gen initial_number_hh = pop_bucket/median_hh_size	
la var pop_bucket "Assumed village population at the time of electrification"
la var p_growth_median "Assumed annual population growth rate (from 2001/2011 Census)"
la var median_hh_size "Assumed median household size (from 2011 Census)"
la var initial_number_hh "Initial number of households in village"

	
	// Grab 2008-to-2010 rupee conversion rate
preserve 
import excel "$nss/INDCPIALLAINMEI.xls", sheet("FRED Graph") cellrange(A11:B33) firstrow clear
gen year = year(observation_date)
sum IND if year==2010
local cpi2010 = r(mean)
gen INDdef = IND/`cpi2010'
sum INDdef if year==2008
global cpi2008 = r(mean)
restore
	
	// Per-household costs (based on 2200 Rs per BPL household, in 2008 rupees)
gen hh_costs = initial_number_hh * 2200 / $cpi2008
la var hh_costs "Total variable costs of electrification"
drop initial_number_hh

	// Fixed costs (from World Bank report, Table 5.1, p51)
gen FC_lo = 1300000 / $cpi2008
gen FC_hi = 1800000 / $cpi2008
la var FC_lo "Fixed costs of electrificaiton: low"
la var FC_hi "Fixed costs of electrificaiton: high"


	// Bring in SHRUG fuzzy RD robust results
preserve 

use "$results/RDROBUST_outcomes_fuzzy_shrug.dta", clear

sum beta_robust if fuzzy_step==1 & fuzzyvar=="vdp_pwr_h_com_avg_11" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local beta_hours = r(mean)

sum se_robust if fuzzy_step==1 & fuzzyvar=="vdp_pwr_h_com_avg_11" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local se_hours = r(mean)

sum uci_robust if fuzzy_step==1 & fuzzyvar=="vdp_pwr_h_com_avg_11" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local uci_hours = r(mean)

sum beta_robust if fuzzy_step==1 & fuzzyvar=="lights_max2011_hat" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local beta_lights = r(mean)

sum se_robust if fuzzy_step==1 & fuzzyvar=="lights_max2011_hat" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local se_lights = r(mean)

sum uci_robust if fuzzy_step==1 & fuzzyvar=="lights_max2011_hat" & yvar=="secc11_rural_cons_pc"	
assert r(N)==1
local uci_lights = r(mean)

restore

	// UNITS: MONTHLY CONSUMPTION PER CAPITA, in 2011 rupees
gen beta_hours = `beta_hours'*12 // inflate to annual consumption
gen se_hours = `se_hours'*12 // inflate to annual consumption
gen uci_hours = `uci_hours'*12 // inflate to annual consumption
gen beta_lights = `beta_lights'*12 // inflate to annual consumption
gen se_lights = `se_lights'*12 // inflate to annual consumption
gen uci_lights = `uci_lights'*12 // inflate to annual consumption
	
	// Grab 2011-to-2010 rupee conversion rate
preserve 
import excel "$nss/INDCPIALLAINMEI.xls", sheet("FRED Graph") cellrange(A11:B33) firstrow clear
gen year = year(observation_date)
sum IND if year==2010
local cpi2010 = r(mean)
gen INDdef = IND/`cpi2010'
sum INDdef if year==2011
global cpi2011 = r(mean)
restore
replace beta_hours = beta_hours / $cpi2011
replace se_hours = se_hours / $cpi2011
replace uci_hours = uci_hours / $cpi2011
replace beta_lights = beta_lights / $cpi2011
replace se_lights = se_lights / $cpi2011
replace uci_lights = uci_lights / $cpi2011
la var beta_hours "Rescaled fuzzy RD point estimate, hours of commercial power"
la var beta_lights "Rescaled fuzzy RD point estimate, 2011 brightness"
la var se_hours "SE on rescaled fuzzy RD point estimate, hours of commercial power"
la var se_lights "SE on rescaled fuzzy RD point estimate, 2011 brightness"
	
	// Bring in NSS IV results
preserve 
use "$results/nss_reg_results.dta", clear
keep if panel=="district-year collapsed" & regs=="iv" & yvar=="mth_pc_expE1" & inlist(ytag,"_dec12","_dec30")
keep if inlist(fes, "year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt")
keep if ifs==""
assert _N==2

sum beta if ytag=="_dec12" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"
assert r(N)==1
local beta_nss1 = r(mean)

sum se if ytag=="_dec12" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"
assert r(N)==1
local se_nss1 = r(mean)

sum ci95_hi if ytag=="_dec12" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"
assert r(N)==1
local uci_nss1 = r(mean)

sum nclust if ytag=="_dec12" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"	
assert r(N)==1
local dof_nss1 = r(mean)

sum beta if ytag=="_dec30" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"
assert r(N)==1
local beta_nss2 = r(mean)

sum se if ytag=="_dec30" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"
assert r(N)==1
local se_nss2 = r(mean)

sum ci95_hi if ytag=="_dec30" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"	
assert r(N)==1
local uci_nss2 = r(mean)

sum nclust if ytag=="_dec30" & fes=="year c.year#exp05_st_4ile c.year#exp05_ntl_10ile stdt"	
assert r(N)==1
local dof_nss2 = r(mean)

restore

	// UNITS: MONTHLY CONSUMPTION PER CAPITA, in 2010 rupees
gen beta_nss1 = `beta_nss1'*12 // inflate to annual consumption
gen se_nss1 = `se_nss1'*12 // inflate to annual consumption
gen uci_nss1 = `uci_nss1'*12 // inflate to annual consumption
gen dof_nss1 = `dof_nss1'
gen beta_nss2 = `beta_nss2'*12 // inflate to annual consumption
gen se_nss2 = `se_nss2'*12 // inflate to annual consumption
gen uci_nss2 = `uci_nss2'*12 // inflate to annual consumption
gen dof_nss2 = `dof_nss2'
la var beta_nss1 "Rescaled 2SLS point estimate, Q1"
la var beta_nss2 "Rescaled 2SLS point estimate, Q25"
la var se_nss1 "SE on rescaled 2SLS point estimate, Q1"
la var se_nss2 "SE on rescaled 2SLS point estimate, Q25"

	// Reshape dataset wide to prepares for boostraps
gen bs = 1
reshape wide p_growth_median median_hh_size hh_costs, i(bs) j(pop_bucket)
global NBOOT = 10000
set obs $NBOOT
replace bs = _n
foreach v of varlist p_growth_median* median_hh_size* hh_costs* FC* beta* se* uci* dof* {
	replace `v' = `v'[1]
}	

	// Expand to 20 years
expand 20, gen(temp_new)
sort bs temp_new
gen t = 1 if temp_new==0
replace t = t[_n-1]+1 if temp_new==1 & bs==bs[_n-1]
la var t "Years since electrificaiton (counting from t=1)"
unique bs t
assert r(unique)==r(N)
drop temp_new

	// Apply population growth
foreach pop in $pop {
    gen pop`pop'g = `pop'*(1+p_growth_median`pop')^(t-1)
	la var pop`pop'g "`pop'-person village's population in year t, assuming population growth"
}

	// Generate bootstrap draws for each village (1 draw per village)
foreach iv in hours lights nss1 nss2 {

	// set scaling factor for LATEs
	local scale = ""
	if "`iv'"=="hours" {
		local scale = 10 // inflate from 1 to 10 hours per day
	}
	else if "`iv'"=="lights" {
		local scale = 2.6 // inflate from 1 to 2.6 brightness units
	}
	else {
	    local scale = 1 // NSS estiamtes are already in units of 0 to full electrification
	}
	
	// random draw for each village
	if inlist("`iv'","hours","lights") {
		gen beta_bs_`iv' = rnormal(beta_`iv',se_`iv')*`scale' if t==1
		la var beta_bs_`iv' "Bootstrap draw for a 300-person village, `iv' IV "
	}
	else if "`iv'"=="nss1" {
		gen beta_bs_`iv' = (rt(dof_`iv'-1)*se_`iv' + beta_`iv')*`scale' if t==1
		la var beta_bs_`iv' "Bootstrap draw for a 1000-person village, `iv' IV"
	}
	else if "`iv'"=="nss2" {
		gen beta_bs_`iv' = (rt(dof_`iv'-1)*se_`iv' + beta_`iv')*`scale' if t==1
		la var beta_bs_`iv' "Bootstrap draw for a 2000-person village, `iv' IV"
	}
}	

	// Confirm that sampling distibutions are calibrated appropriately
count if beta_bs_lights<uci_lights*2.6 & t==1
di r(N)/10000
count if beta_bs_hours<uci_hours*10 & t==1
di r(N)/10000
count if beta_bs_nss1<uci_nss1 & t==1
di r(N)/10000
count if beta_bs_nss2<uci_nss2 & t==1
di r(N)/10000
drop uci* dof*

	// Kernel densities of draws
count if beta_bs_hours>0 & bs!=. & t==1
local hours_pos = string(100*r(N)/10000,"%9.1f")
count if beta_bs_lights>0 & bs!=. & t==1
local lights_pos = string(100*r(N)/10000,"%9.1f")
count if beta_bs_nss1>0 & bs!=. & t==1
local nss1_pos = string(100*r(N)/10000,"%9.1f")
count if beta_bs_nss2>0 & bs!=. & t==1
local nss2_pos = string(100*r(N)/10000,"%9.1f")
		
twoway ///
	(kdensity beta_bs_hours if t==1, lc(navy) lw(medthick)) ///
	(kdensity beta_bs_lights if t==1, lc(midblue) lw(medthick)) ///
	(kdensity beta_bs_nss1 if t==1, lc(maroon) lw(medthick)) ///
	(kdensity beta_bs_nss2 if t==1, lc(orange_red) lw(medthick)) ///
	, ///
	legend(order(1 "SHRUG, hours of power (`hours_pos'% > 0)      " 3 "NSS, quintile 1 (`nss1_pos'% > 0)"  ///
	             2 "SHRUG, brightness (`lights_pos'% > 0)      " 4 "NSS, quintiles 2-5 (`nss2_pos'% > 0)") pos(6) col(2) symxsize(8)) ///
	xlabel(-30000 "-30" -20000 "-20" -10000 "-10" 0 10000 "10" 20000 "20" 30000 "30" 40000 "40") ///
	yscale(lstyle(none)) ylabel(none) ///
	xtitle("Change in annual consumption expenditure per capita") ///
	aspect(0.3)
graph export "$results/expenditure_sampling_distributions.pdf", replace
	
	
	// fill forward bootstrap draws
foreach v of varlist beta_bs* {
    replace `v' = `v'[_n-1] if t==t[_n-1]+1 & bs==bs[_n-1]
	assert `v'!=.
}	

	// Disconted benefits in year t
foreach iv in hours lights nss1 nss2 {
	if "`iv'"=="nss1" {
	    local pop = 1000
	}
	else if "`iv'"=="nss2" {
	    local pop = 2000
	}
	else {
	    local pop = 300
	}
	foreach disc in $disc {	
		local d = string(`disc'*100,"%9.0f")
			
		// discounted benefits in year t, with no population growth
		gen B_`iv'_`d'_`pop' = (beta_bs_`iv' * `pop') / ((1+`disc')^(t-1))
		la var B_`iv'_`d'_`pop' "Discounted benefits in year t w/o pop growth: IV=`iv', disc=`d'%, pop=`pop'"
		
		// discounted benefits in year t, with population growth
		gen B_`iv'_`d'_`pop'g = (beta_bs_`iv' * pop`pop'g) / ((1+`disc')^(t-1))
		la var B_`iv'_`d'_`pop'g "Discounted benefits in year t w/ pop growth: IV=`iv', disc=`d'%, pop=`pop'"
	}
}
	
	// Save full dataset, before collapsing and calculating NPVs/ROIs
la var bs "Bootstrap iteration"
compress 
save "$results/roi_results_expenditure_bootstraps_full.dta", replace


***
***	

	// Reload bootstrap draws and discounted benefits
use "$results/roi_results_expenditure_bootstraps_full.dta", clear
	
	// Drop bootstrap draws and sum across years
drop beta_bs* t pop*g	
foreach v of varlist B_* {
    egen PDV20_`v' = sum(`v'), by(bs)
	local lab: variable label `v'
	local lab = subinstr("`lab'","Discounted benefits in year t","20-year PDV of benefits",1)
	la var PDV20_`v' "`lab'"
	drop `v'
}
duplicates drop
unique bs
assert r(unique)==r(N)

	// Subtract off costs to get full NPV and calculate ROI
foreach iv in hours lights nss1 nss2 {
	if "`iv'"=="nss1" {
	    local pop = 1000
	}
	else if "`iv'"=="nss2" {
	    local pop = 2000
	}
	else {
	    local pop = 300
	}
	foreach disc in $disc {
	    local d = string(`disc'*100,"%9.0f")
			foreach fc in lo hi {
				
				// discounted benefitis minus upfront costs
				gen NPV20_`iv'_`d'_`fc'_`pop' = PDV20_B_`iv'_`d'_`pop' - hh_costs`pop' - FC_`fc'
				gen NPV20_`iv'_`d'_`fc'_`pop'g = PDV20_B_`iv'_`d'_`pop'g - hh_costs`pop' - FC_`fc'
				la var NPV20_`iv'_`d'_`fc'_`pop' "20-year NPV w/o pop growth: FC=`fc', IV=`iv', disc=`d'%, pop=`pop'"
				la var NPV20_`iv'_`d'_`fc'_`pop'g "20-year NPV w/ pop growth: FC=`fc', IV=`iv', disc=`d'%, pop=`pop'"

				// ROI
				gen ROI20_`iv'_`d'_`fc'_`pop' = NPV20_`iv'_`d'_`fc'_`pop' / ( hh_costs`pop' + FC_`fc' )
				gen ROI20_`iv'_`d'_`fc'_`pop'g = NPV20_`iv'_`d'_`fc'_`pop'g / ( hh_costs`pop' + FC_`fc' )
				la var ROI20_`iv'_`d'_`fc'_`pop' "20-year ROI w/o pop growth: FC=`fc', IV=`iv', disc=`d'%, pop=`pop'"
				la var ROI20_`iv'_`d'_`fc'_`pop'g "20-year ROI w/ pop growth: FC=`fc', IV=`iv', disc=`d'%, pop=`pop'"
		}
	}
}	

	// Assess ROIs
sum ROI20*300, detail	
sum ROI20*300g, detail	
sum ROI20*1000, detail	
sum ROI20*1000g, detail	
sum ROI20*2000, detail	
sum ROI20*2000g, detail	

	// Save
compress
save "$results/roi_results_expenditure_bootstraps.dta", replace


****************************************************************** 
****************************************************************** 

